1. Load/ install all the required packages and custom scripts

This code can be found in STExplorer’s GitHub repo as load_packages.R

## 1 Bioconductor ----
pkgBio <- c("Spaniel", "scater", 
            "batchelor", "scran", "DESeq2")

if (!require("BiocManager", quietly = TRUE))
  install.packages("BiocManager")

## Check if packages are installed and load them or install&load them if not.
pkg.check <- lapply(
  pkgBio, 
  FUN <- function(x) {
    if (!require(x, character.only = TRUE)) {
      BiocManager::install(x, update = FALSE)
      library(x, character.only = TRUE)
    } 
  }
)


## 2 Cran ----
pkgCRAN <- c("Seurat", "cowplot", 
             "RColorBrewer",
             "harmony", "dplyr", 
             "spdep", "sf", "jsonlite",
             "tidyverse")

## Check if packages are installed and load them or install&load them if not.
pkg.check <- lapply(
  pkgCRAN, 
  FUN <- function(x) {
    if (!require(x, character.only = TRUE)) {
      install.packages(x, dependencies = TRUE)
      library(x, character.only = TRUE)
    }
  }
)

## 3 GitHub ----
pkgGit <- c("RachelQueen1/SCFunctionsV3",
            "eddelbuettel/rbenchmark")

if (!require("devtools", quietly = TRUE))
  install.packages("devtools")

## Check if packages are installed and load them or install&load them if not.
pkg.check <- lapply(
  pkgGit, 
  FUN <- function(x) {
    pkg.name <- sub(".*/", "", x)
    if (!require(pkg.name, character.only = TRUE)) {
      devtools::install_git(paste0("https://github.com/", x),
                            force = TRUE)
      library(pkg.name, character.only = TRUE)
    }
  }
)

## 4 source scripts ----
source("./R/sf_coord_as_df.R")
source("./R/sfc_coord_as_df.R")
source("./R/readSpacerangerMD.R")
source("./R/readSpacerangerD.R")

2. Make the required folders

This code can be found in STExplorer’s GitHub repo as make_folders.R

## Set file paths
projDir <- file.path(getwd(), "data")
inputDir <- file.path(projDir, "spaceranger_outs/")
outputDir <- file.path(projDir, "rObjects/")
csvDir <- file.path(projDir, "csvFiles/")
graphDir <- file.path(projDir, "graphics_out/")

## Check if inputDir/ outputDir/ csvDir exist and create them if not.
dirs <- c(inputDir, outputDir, csvDir, graphDir)

dirCheck <- lapply(
    dirs,
    FUN = function(x){
        if(!dir.exists(x)) {
            dir.create(x, recursive = TRUE)
            print(paste0("Folder: ", x, " CREATED!"))
        } else {
            print(paste0("Folder: ", x, " EXISTS!"))
        }
    }
)
## [1] "Folder: /Users/b9047753/Library/CloudStorage/OneDrive-NewcastleUniversity/Projects/STExplorer/data/spaceranger_outs/ EXISTS!"
## [1] "Folder: /Users/b9047753/Library/CloudStorage/OneDrive-NewcastleUniversity/Projects/STExplorer/data/rObjects/ EXISTS!"
## [1] "Folder: /Users/b9047753/Library/CloudStorage/OneDrive-NewcastleUniversity/Projects/STExplorer/data/csvFiles/ EXISTS!"
## [1] "Folder: /Users/b9047753/Library/CloudStorage/OneDrive-NewcastleUniversity/Projects/STExplorer/data/graphics_out/ EXISTS!"

3. Find neighbours

This code can be found in STExplorer’s GitHub repo as find_nb_pixelXY.R

3.1 Import the MetaData and the Data to be used

## set the file paths to spaceranger's spatial and gene expression folders
sampleDir <- "Olfactory_Bulb/Olfactory_Bulb_A1_Results"
spatialDir <- file.path(inputDir, sampleDir, "spatial")
countsDir <- file.path(inputDir, sampleDir, "filtered_feature_bc_matrix")

## Import the dataset
inputMD <- readSpacerangerMD(spatialDir, res = "low") #read-in MetaData
inputD <- readSpacerangerD(countsDir) #read-in gene expression Data

3.2 Generate an enveloped Voronoi Tessellation for the whole capture area

As capture area we refer to the total number of spots that are on the slide

## Select spots in both bins (Sections) 0 and 1
spot_position <- inputMD %>% 
    select(c("Barcode", "pixel_x", "pixel_y", "Section"))
head(spot_position, 5)
##              Barcode pixel_x pixel_y Section
## 1 ACGCCTGACACGCGCT-1   66.12  520.08       0
## 2 TACCGATCCAACACTT-1   72.00  516.66       0
## 3 ATTAAAGCGGACGAGC-1   66.12  513.24       0
## 4 GATAAGGGACGATTAG-1   72.00  509.88       0
## 5 GTGCAAATCACCAATA-1   66.12  506.46       0
## Convert spots to centroids
centroids <- spot_position %>% 
  st_as_sf(coords = c("pixel_x", "pixel_y"), 
           remove = FALSE)
head(centroids, 5)
## Simple feature collection with 5 features and 4 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 66.12 ymin: 506.46 xmax: 72 ymax: 520.08
## CRS:           NA
##              Barcode pixel_x pixel_y Section             geometry
## 1 ACGCCTGACACGCGCT-1   66.12  520.08       0 POINT (66.12 520.08)
## 2 TACCGATCCAACACTT-1   72.00  516.66       0    POINT (72 516.66)
## 3 ATTAAAGCGGACGAGC-1   66.12  513.24       0 POINT (66.12 513.24)
## 4 GATAAGGGACGATTAG-1   72.00  509.88       0    POINT (72 509.88)
## 5 GTGCAAATCACCAATA-1   66.12  506.46       0 POINT (66.12 506.46)
## Combine the points into a multipoint geometry:
cntd_union <- st_union(centroids)
head(cntd_union)
## Geometry set for 1 feature 
## Geometry type: MULTIPOINT
## Dimension:     XY
## Bounding box:  xmin: 65.64 ymin: 88.08 xmax: 521.34 ymax: 520.08
## CRS:           NA
## MULTIPOINT ((65.64 91.98), (65.64 98.76), (65.7...
## Use the union of points to generate a voronoi object
voronoi <- st_voronoi(cntd_union, bOnlyEdges = TRUE)
head(voronoi)
## Geometry set for 1 feature 
## Geometry type: MULTILINESTRING
## Dimension:     XY
## Bounding box:  xmin: -390.06 ymin: -367.62 xmax: 977.04 ymax: 975.78
## CRS:           NA
## MULTILINESTRING ((69.57727 91.95, 67.64273 95.3...
## Create an enveloped voronoi tessellation around the tissue
voronoi_env <- st_intersection(st_cast(voronoi), st_convex_hull(cntd_union))
head(voronoi_env)
## Geometry set for 1 feature 
## Geometry type: MULTILINESTRING
## Dimension:     XY
## Bounding box:  xmin: 65.64 ymin: 88.08 xmax: 521.34 ymax: 520.08
## CRS:           NA
## MULTILINESTRING ((69.57727 91.95, 67.64273 95.3...

Plot and save the Voronoi tessellation

3.3 Polygonise the tessellation only for the bin_1 spots

As bin_1 spots we refer to the on-tissue spots.

## Generate the POLYGONS from the MULTILINESTRING for bin_1 only and attach the  
##  barcode names
polygons <- st_polygonize(voronoi_env) %>% # polygonise the tessellation
    st_cast() %>% # convert GEOMETRYCOLLECTION to multiple POLYGONS
    st_sf() %>%  # convert sfc object to sf for st_join afterwards
    st_join(., 
            centroids[centroids$Section == 1,],
            join = st_contains,
            left = FALSE) %>% # Join the centroids with the POLYGONS
    mutate(Barcode_rn = Barcode) %>% # duplicate the barcode column
    column_to_rownames("Barcode_rn") %>% # move duplicate column to row names
    st_sf() # convert back to sf (mutate makes it a df)

3.4 Find neighbours based on adjacency

## Create contiguity neighbours
neighbours <- poly2nb(polygons, snap = 0)
names(neighbours) = attr(neighbours, "region.id") # add names to the sub-lists

3.5 Make one data frame to contain all the information

## Add number of neighbours for each polygon back to the polygons object
polygons$nb_count <- card(neighbours)

## Add the neighbour (nb) IDs as a nested df in the polygons object
nb_IDs <- neighbours %>%
    nb2lines(., coords = polygons$geometry) %>% #get nb connecting lines 
    as("sf") %>% #convert to sf
    st_drop_geometry() %>% #drop geometry column
    select(i_ID, j_ID) %>% #select only nb ID columns
    rename(nb_IDs = j_ID) %>% #rename the neighbours ID column
    group_by(i_ID) %>% #group by spot
    nest() #nest the groupings

polygons <- right_join(polygons, nb_IDs, by = c("Barcode" = "i_ID")) %>%
    rename(nb_IDs = data, geom_pol = geometry)


## Update the polygon object to keep the centroid geometries as well
polygons <- left_join(as.data.frame(polygons), as.data.frame(centroids), 
                      by = c("Barcode" = "Barcode"), suffix = c("", ".y")) %>%
    select(!ends_with(".y")) %>% 
    rename(geom_cntd = geometry) %>% 
    st_sf(sf_column_name = "geom_pol")

Plot the polygons

3.5 Calculate neighbour weights

## Calculate neighbour weights with a distance decay function
neighbours_wght <- nb2listwdist(neighbours, polygons$geom_cntd,
                                type = "idw", style = "raw", alpha = 1)
head(neighbours_wght$weights, 5)
## [[1]]
## [1] 0.1461988 0.1474926 0.1470099 0.1476604
## 
## [[2]]
## [1] 0.1461988 0.1474926 0.1470099 0.1476604
## 
## [[3]]
## [1] 0.1461988 0.1458960 0.1465318
## 
## [[4]]
## [1] 0.1474926 0.1474926 0.1470099 0.1476604
## 
## [[5]]
## [1] 0.1461988 0.1474926 0.1458960 0.1465318

4. Import gene counts and normalise them

This code can be found in STExplorer’s GitHub repo as find_nb_pixelXY.R

## Import gene counts
inputD <- readSpacerangerD(countsDir)

## Prepare for gene expression normalisation using DESeq2
spotName <- colnames(inputD)
spotTable <- data.frame(spotName = spotName)

dds <- DESeqDataSetFromMatrix(countData = inputD,
                              colData = spotTable,
                              design = ~spotName)
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not a warning or an error]
dds = estimateSizeFactors(dds) # Estimate size factors
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not a warning or an error]
head(sizeFactors(dds), 10) # Print the first 10 size factors
## AAACAAGTATCTCCCA-1 AAACCGGGTAGGTACC-1 AAACCGTTCGTCCAGG-1 AAACGAGACGGTTGAT-1 
##          0.1355829          3.3937026          1.8759736          1.3502440 
## AAACTGCTGGCTCCAA-1 AAACTTGCAAACGTAT-1 AAAGGCTCTCGCGCCG-1 AAAGTAGCATTGCTCA-1 
##          1.7467547          0.2813147          1.4772664          0.6734527 
## AAAGTGTGATTTATCT-1 AAAGTTGACTCCCGTA-1 
##          1.7160711          1.1288676
counts = counts(dds, normalized = TRUE) # export normalised counts
head(counts, c(10,5)) # Print the first 10 gene counts
##                    AAACAAGTATCTCCCA-1 AAACCGGGTAGGTACC-1 AAACCGTTCGTCCAGG-1
## ENSMUSG00000051951                  0          0.0000000          0.0000000
## ENSMUSG00000089699                  0          0.0000000          0.0000000
## ENSMUSG00000102331                  0          0.0000000          0.0000000
## ENSMUSG00000102343                  0          0.0000000          0.0000000
## ENSMUSG00000025900                  0          0.0000000          0.0000000
## ENSMUSG00000025902                  0          0.0000000          0.0000000
## ENSMUSG00000104238                  0          0.0000000          0.0000000
## ENSMUSG00000104328                  0          0.0000000          0.0000000
## ENSMUSG00000033845                  0          0.8839902          0.5330565
## ENSMUSG00000025903                  0          0.0000000          1.5991696
##                    AAACGAGACGGTTGAT-1 AAACTGCTGGCTCCAA-1
## ENSMUSG00000051951          0.0000000            0.00000
## ENSMUSG00000089699          0.0000000            0.00000
## ENSMUSG00000102331          0.0000000            0.00000
## ENSMUSG00000102343          0.0000000            0.00000
## ENSMUSG00000025900          0.0000000            0.00000
## ENSMUSG00000025902          0.0000000            0.00000
## ENSMUSG00000104238          0.0000000            0.00000
## ENSMUSG00000104328          0.0000000            0.00000
## ENSMUSG00000033845          0.7406069            1.14498
## ENSMUSG00000025903          0.7406069            0.00000